/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/


#pragma once


#include <functional>
#include <numeric>
#include <vector>

#include <pyclustering/cluster/data_type.hpp>

#include <pyclustering/utils/metric.hpp>


using namespace pyclustering::utils::metric;


namespace pyclustering {

namespace clst {


using medoids = std::vector<std::size_t>;


/*

@brief PAM BUILD algorithm is designed to find optimal initial medoids for K-Medoids algorithms family, like PAM.

@details The initialization procedure chooses `k` times the point which yields the smallest distance sum of total
          deviation. Complexity of the algorithm is \f$O\left ( n^{2}k \right )\f$, where `n` is the amount
          of points and `k` is the amount of initial medoids to generate.

Implementation based on paper @cite inproceedings::cluster::kmedoids::1.

There is an example where PAM BUILD algorithm is used to generate initial medoids for sample `Tetra` where four clusters are expected.
@code
    // Input data to find initial medoids.
    dataset data = { {3.522979, 5.487981}, {3.768699, 5.364477}, {3.423602, 5.4199}, {3.803905, 5.389491}, {3.93669, 5.663041}, {6.968136, 7.755556}, {6.750795, 7.269541}, {6.593196, 7.850364}, {6.978178, 7.60985}, {6.554487, 7.498119} };

    // Amount of medoids to initialize.
    std::size_t amount_medoids = 2;

    // Initialize medoids using PAM BUILD algorithm.
    medoids initial_medoids;
    pam_build(amount_medoids).initialize(data, initial_medoids);

    // Display initial medoids.
    std::cout << "Initial medoids: [ " << std::endl;
    for (auto medoid : initial_medoids) {
        std::cout << medoid << " ";
    }
    std::cout << "]" << std::endl;
@endcode

The output of the code above:
@code
    Initial medoids: [ 4 8 ]
@endcode

PAM BUILD algorithm provides much more optimal initial medoids in average than kmeans++ based approach and as a
result it reduces amount of iterations that is needed for K-Medoids algorithms family to extract clusters. There is
an illustration where initial medoids are obtained for various samples (initial medoids are marked by blue starts).

@image html pam_build_initial_medoids.png "Fig. 1. Initial medoids for various samples generated by PAM BUILD algorithm."

*/
class pam_build {
private:
    constexpr static std::size_t INVALID_MEDOID = std::numeric_limits<std::size_t>::max();

private:
    using metric = distance_functor<std::vector<double>>;

    using distance_calculator = std::function<double(const std::size_t, const std::size_t)>;

private:
    std::size_t                     m_amount = 0;

    distance_metric<point>          m_metric = distance_metric_factory<point>::euclidean_square();
    mutable distance_calculator     m_calculator;

    mutable std::vector<double>     m_distance_closest_medoid;
    mutable medoids *               m_medoids_ptr   = nullptr;
    mutable dataset const *         m_data_ptr      = nullptr;

public:
    /*

    @brief Default constructor to create PAM BUILD algorithm.

    */
    pam_build() = default;

    /*
    
    @brief    Constructor of PAM BUILD algorithm.

    @param[in] p_amount: amount of medoids that should be initialized.

    */
    explicit pam_build(const std::size_t p_amount);

    /*

    @brief    Constructor of PAM BUILD algorithm.

    @param[in] p_amount: amount of medoids that should be initialized.
    @param[in] p_metric: metric for distance calculation between points.

    */
    pam_build(const std::size_t p_amount, const metric & p_metric);

    /*

    @brief    Destructor of PAM BUILD algorithm.

    */
    ~pam_build() = default;

public:
    /*

    @brief    Performs center initialization process in line algorithm configuration.

    @param[in]  p_data: data points for that medoids are calculated.
    @param[out] p_medoids: initialized medoids for the specified data.

    */
    void initialize(const dataset & p_data, const medoids & p_medoids) const;

    /*

    @brief    Performs center initialization process in line algorithm configuration.

    @param[in]  p_data: data for that medoids are calculated.
    @param[in]  p_type: data type of `p_data` that is processed by this method (`POINTS`, `DISTANCE_MATRIX`).
    @param[out] p_medoids: initialized medoids for the specified data.

    */
    void initialize(const dataset & p_data, const data_t p_type, const medoids & p_medoids) const;

private:
    void calculate_first_medoid() const;

    void calculate_next_medoids() const;

    pam_build::distance_calculator create_distance_calculator(const data_t p_type) const;
};


}

}